Problems, potential problems and caveats
If we look by PUBLIC_HEALTH_REGION then we’ll need to clean up that data. there are NA’s. PROVIDER_NAME may change over time. “CHRISTUS Southeast Texas - St Elizabeth” is an example.
Working out Cesarean delivery rates by hospital (and later quarter)
library(tidyverse)
library(janitor)
library(readxl)
library(DT)
library(plotly)
library(lubridate)
deliveries <- read_rds("data-processed/ahrq_del_ucmp.rds")
deliveries_cnt <- deliveries %>% nrow()
paste("Total uncomplicated deliveries:", deliveries_cnt)
## [1] "Total uncomplicated deliveries: 993397"
deliveries %>%
select(THCIC_ID, PROVIDER_NAME) %>%
distinct() %>%
arrange(PROVIDER_NAME) %>%
write_csv("data-processed/facility-list.csv")
deliveries <- deliveries %>%
mutate(
YR = substr(DISCHARGE, 1, 4),
YEAR = ymd(YR,truncated = 2L)
)
These are from AHRQ Inpatient Quality Indicator 21 (IQI 21) Cesarean Delivery Rate, Uncomplicated
Number of Cesarean deliveries among cases meeting the inclusion and exclusion rules for the denominator. Cesarean deliveries are identified by either:
Here we create CBIRTH using the first method using MS-DRG.
deliveries <- deliveries %>%
mutate(
CBIRTH = ifelse(
MS_DRG %in% c("765", "766"), "Y", "N"
)
)
Overall Cesarean rate for the data set.
deliveries %>%
tabyl(CBIRTH) %>%
rename(CNT = n, PRCT = percent)%>%
adorn_pct_formatting()
crate_all <- deliveries %>%
group_by(PROVIDER_NAME, CBIRTH) %>%
summarize(CNT = n()) %>%
pivot_wider(names_from = CBIRTH, values_from = CNT) %>%
filter(N >= 100) %>%
mutate(
CRATE = round((Y / (N+Y)) * 100,1)
) %>%
arrange(CRATE %>% desc())
# writing out for use in separate file
crate_all %>% write_rds("data-processed/create_all.rds")
crate_all %>%
datatable()
There are some issues here with PROVIDER_NAME, as perhaps they changed over time. I might need to use the THCIC_ID and then use the 2018 facility names.
crate_q <- deliveries %>%
group_by(DISCHARGE, PROVIDER_NAME, CBIRTH) %>%
summarize(CNT = n()) %>%
pivot_wider(names_from = CBIRTH, values_from = CNT) %>%
filter(N >= 100) %>%
mutate(
CRATE = round((Y / (N+Y)) * 100,1)
) %>%
arrange(CRATE %>% desc())
crate_y <- deliveries %>%
group_by(YR, PROVIDER_NAME, CBIRTH) %>%
summarize(CNT = n()) %>%
pivot_wider(names_from = CBIRTH, values_from = CNT) %>%
filter(N >= 100) %>%
mutate(
CRATE = round((Y / (N+Y)) * 100,1)
) %>%
arrange(CRATE %>% desc())
crate_y
Make a list of specific hospitals we’ll track. I’m noticing some of these don’t show in plots because they have fewer than 100 births.
st_list <- c(
"Doctors Hospital-Laredo",
"Laredo Medical Center",
"Doctors Hospital-Renaissance",
"Edinburg Regional Medical Center",
"Valley Regional Medical Center",
"Citizens Medical Center",
"University Hospital",
"Yoakum Community Hospital",
"Yoakum County Hospital",
"DeTar Hospital-Navarro",
"DeTar Hospital-North"
)
crate_q_data <- crate_q %>%
filter(
PROVIDER_NAME %in% st_list
) %>%
select(DISCHARGE, PROVIDER_NAME, CRATE)
crate_q_plot <- crate_q_data %>%
ggplot(aes(DISCHARGE, CRATE)) +
geom_line(aes(group = PROVIDER_NAME, color = PROVIDER_NAME)) +
expand_limits(y = c(0,60)) +
theme(legend.position="bottom", legend.box = "vertical") +
guides(col = guide_legend(nrow = 5)) +
labs(title = "Cesarean Rate for S. Texas hospitals", x = "Quarter of discharge", y = "Percent of deliveries as Cesarean")
crate_q_plot
Let’s try this using plotly.
this is not figured out yet.
crate_y_data <- crate_y %>%
filter(
PROVIDER_NAME %in% st_list
)
crate_y_data %>%
plot_ly(
x = ~YR,
y = ~CRATE,
color = ~PROVIDER_NAME,
type = "scatter",
mode = "lines"
) %>%
layout(title = "Cesareans by year")
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
crate_y_plot <- crate_y_data %>%
ggplot(aes(YR, CRATE)) +
geom_line(aes(group = PROVIDER_NAME, color = PROVIDER_NAME)) +
expand_limits(y = c(0,60)) +
theme(legend.position="bottom", legend.box = "vertical") +
guides(col = guide_legend(nrow = 6)) +
labs(title = "Cesarean Rate for S. Texas hospitals", x = "Year of discharge", y = "Percent of deliveries as Cesarean")
crate_y_plot
counties <- read_excel("resources/PHR_MSA_County_masterlist.xlsx") %>% clean_names()
To be thorough, I would like to create a new Cesarean measure using the second method described by AHRQ:
This is a little more involved and I haven’t quite figured it out yet.
prcsecp <- c(
"10D00Z0", "10D00Z1", "10D00Z2"
)
prcse2p <- c(
"10A00ZZ", "10A03ZZ", "10A04ZZ"
)
# vector of procedure columns
surg_proc <- deliveries %>%
select(contains("SURG_PROC_CODE")) %>% names()
# filters deliveries by surg_proc with prcsecp values
deliveries %>%
filter_at(
vars(surg_proc),
any_vars(. %in% prcsecp)
) %>%
select(surg_proc) %>%
head()
# looks through only one column for precsecp and sets it
# but I need to look through all of them.
deliveries %>%
mutate(
CBIRTH2 = if_else(
PRINC_SURG_PROC_CODE %in% prcsecp,
"Y", "F"
)
) %>%
count(CBIRTH2)
# deliveries %>%
# mutate(
# CBIRTH2 = if_else(
# surg_proc %in% prcsecp, "Y", "F"
# )
# )
beepr::beep(4)